################################################################### 
##    Project: Measuring Media Freedom: An IRT Analysis           #
##    Authors:    Jonathan A. Solis        Philip D. Waggoner     #
##             (jsolis@aiddata.wm.edu)  (pdwaggoner@uchicago.edu) #
##    Journal: British Journal of Political Science (2020)        #                              *
##    Task: Replicate MSF scores; manuscript figures 1-5 in       #
##    Software: R version 3.6.0                                   #
##    Date: 01/27/2020                                            #
################################################################### 

# Clear environment and set working directory 
rm(list=ls())
setwd(" ")

# Packages to create MSF scores
library(R2jags)
library(coda)
library(rstan)
library(StanHeaders)
library(ggplot2)
library(lattice)
library(rjags)
# Packages to create map (figure 3, manuscript)
library(sp)
library(rworldmap)
library(RColorBrewer)

######################
# I. CREATE MSF DATA #
######################

# Note: This code (I.) will create the MSF Data. It took ~4 hours to run on a small, personal  
#       laptop. We recommend utilizing high-performance computing (HPC) to perform the task 
#       faster, especially if you wish to increase the iterations per chain (as we did with 
#       several robustness checks).

saveplots <- FALSE

recode <- function(x) {
  y <- rep(NA, length(x))
  nt <- as.numeric(names(table(x)))
  for (i in 1:length(nt)) {
    y[x==nt[i]] <- i
  }
  return(y)
}

cymat <- function(x, cnum, yr) {
  ret <- matrix(NA, nrow=length(table(cnum)), ncol=length(table(yr)))
  v.cnum <- as.numeric(names(table(cnum)))
  v.year <- as.numeric(names(table(yr)))
  for (i in 1:nrow(ret)) {
    for (j in 1:ncol(ret)) {
      ins <- x[cnum==v.cnum[i] & yr==v.year[j]]
      if (length(ins)>0) {
        ret[i, j] <- ins
      }
    }
  }
  return(ret)
}

rescale01 <- function(x, lo=NULL, hi=NULL) {
  if (is.null(lo)) lo <- min(x, na.rm=TRUE)
  if (is.null(hi)) hi <- max(x, na.rm=TRUE)
  y <- (x-lo)/(hi-lo) # maps [lo, hi] -> [0, 1]
  return(y)
}

write.est <- function(dat, bugs.model, filename) {
# Output latent variable estimates in a .csv spreadsheet
  res <- data.frame(country=dat$country, 
                    abbr=dat$abbr, 
                    ccode=dat$cow, 
                    year=dat$year, 
                    MSF=round(bugs.model$mean$x, 4), 
                    post.sd=round(bugs.model$sd$x, 4))
  write.csv(res, filename, row.names=FALSE)
  invisible()
}

## Load data and country codes
## Merge country names and abbreviations
indat <- read.csv("irt_full.csv")
ccode <- read.csv("cow2.csv")
indat <- merge(indat, ccode, all.x=T, sort=TRUE)
indat <- indat[order(indat$cow, indat$year), ]

# Remove country-years that are all NA for first/last years
datsplit <- split(indat, indat$cow)
dat <- NULL
for (i in 1:length(datsplit)) {
  yvars <- datsplit[[i]][, c('gmf', 'fh', 'rsf', 'trd_cen', 'critical', 'perspectives', 'harassment', 'selfcen', 'bias', 'corrupt')]
  first <- as.numeric(which(cumsum(!apply(is.na(yvars), 1, all))==1))
  last <- nrow(datsplit[[i]])-as.numeric(which(cumsum(rev(!apply(is.na(yvars), 1, all)))==1))+1
  if (length(first) > 1) {first <- min(first)}
  if (length(last) > 1) {last <- min(last)}
  if (length(first) > 0) {
    dat <- rbind(dat, datsplit[[i]][first:last, ])
  }
}

# Drop countries with fewer than 10 years of data, start to end
dat <- dat[!(dat$cow %in% as.numeric(names(table(dat$cow))[table(dat$cow) < 10])), ]

# Remove unused factor levels
dat$abbr <- dat$abbr[, drop=T]
dat$country <- dat$country[, drop=T]

# Make vectors of country names and abbreviations in order of dataset
cnames <- as.character(dat$country[1])
cabbr <- as.character(dat$abbr[1])
for (i in 2:nrow(dat)) {
  if (dat$country[i] != dat$country[i-1]) {
    cnames <- c(cnames, as.character(dat$country[i]))
    cabbr <- c(cabbr, as.character(dat$abbr[i]))
  }
}

# Tabulate manifest variables
if (F) {
  table(dat$gmf) 
  table(dat$fh)  
  table(dat$rsf) 
  table(dat$trd_cen)                  
  table(dat$critical)
  table(dat$perspectives)
  table(dat$harassment)
  table(dat$selfcen)          
  table(dat$bias)     
  table(dat$corrupt)
}

y1 <- dat$gmf
y2 <- dat$fh
y3 <- dat$rsf
y4 <- 1+dat$trd_cen
y5 <- 1+dat$critical
y6 <- 1+dat$perspectives
y7 <- 1+dat$harassment
y8 <- 1+dat$selfcen
y9 <- 1+dat$bias
y10 <- 1+dat$corrupt
y <- cbind(y1, y2, y3, y4, y5, y6, y7, y8, y9, y10)

# Indexing for countries and years
country <- recode(dat$cow)
year <- recode(dat$year)

# Other inits for model
J <- ncol(y)
K <- apply(y, 2, max, na.rm=TRUE)
N <- nrow(y)
Mc <- length(table(country))
coffset <- as.vector(c(1, 1+cumsum(table(country))))
koffset <- as.vector(c(1, 1+cumsum(K-1)))

inits <-  function() { list(cutp.raw=c(0.52182938, 0.78797409,                          # y1(gmf)         =1,2,3; m=2
                                       0.41919281, 0.76395998,                          # y2(fh)          =1,2,3; m=2
                                       0.34805404, 0.61663270, 0.88341737,              # y3(rsf)         =1,2,3,4; m=3 
                                       0.26359895, 0.40447624, 0.55881514, 0.86402904,  # y4(trd_cen)     =0,1,2,3,4; m=4 
                                       0.10569689, 0.35212595, 0.72214500,              # y5(critical)    =0,1,2,3; m=3
                                       0.19707552, 0.37490490, 0.61966620,              # y6(perspectives)=0,1,2,3; m=3
                                       0.08445936, 0.39741109, 0.73395824, 1.03493743,  # y7(harassment)  =0,1,2,3,4; m=4
                                       0.15550524, 0.40324347, 0.84923560,              # y8(selfcen)     =0,1,2,3; m=3
                                       0.18689146, 0.32777215, 0.45931226, 0.75926566,  # y9(bias)        =0,1,2,3,4; m=4
                                       0.18950968, 0.33261488, 0.52327017, 0.82318824), # y10(corrupt)    =0,1,2,3,4; m=4
                            beta=runif(J, 0, 10), 
                            x.sd=runif(Mc, 0, 1), 
                            x=runif(N, 0, 1)) }

jagsfit <- jags.parallel(data=c("y", "J", "K", "N", "Mc", "coffset", "koffset", "country"), 
                         inits=inits, 
                         parameters.to.save=c("beta", "cutp", "x", "x.sd"), 
                         model.file="DynamicGRM.txt", 
                         n.chains=3,
                         n.iter=2000) #number of iterations per chain
bugs.model <- jagsfit$BUGSoutput

# Save R Data
save(bugs.model, file = "bugs-3x2000-test.RData")
load("bugs-3x2000-test.RData")

# Create .csv file for MSF scores
xhat <- cymat(bugs.model$mean$x, country, year)
x.sd <- cymat(bugs.model$sd$x, country, year)
write.est(dat, bugs.model, "MSFS-estimates_full-3x2000-test.csv")

# Extract Betas (y1-10) for each manifest variable
b<-head(bugs.model$mean[1][1])
head(b)

####################################
# II. Create Figures in Manuscript #
####################################

## FIGURE 1: Observed Ratings and the Posterior Predictive Distribution
##
## Assess model fit: posterior predictive check

# Simulate data from estimates of latent values x and cutpoints on y
vars <- c("GMF", "FH:FoP", "RSF", "VDem-Trad M.Cen", "VDem-Inet Cen", "VDem-Critical.M", "VDem-M.Perspective", 
          "VDem-M.Harassment", "VDem-M.Selfcen", "VDem-M.Bias", "VDem-M.Corruption", "VDem-CM.Access")
cutp.list <- list()
for (j in 1:J) {
  cutp.list[[j]] <- bugs.model$sims.list$cutp[, koffset[j]:(koffset[j+1]-1)]
}

nsims <- 1000 # this took us ~50 mins on a small, personal laptop
resamp <- sample(c(1:bugs.model$n.sims), nsims)
sim.tab <- list(y1=NULL, y2=NULL, y3=NULL, y4=NULL, 
                y5=NULL, y6=NULL, y7=NULL, y8=NULL, 
                y9=NULL, y10=NULL)
pb <- txtProgressBar(min=1, max=nsims, style=3)
for (r in 1:nsims) {      # redraw parameter estimtes
  iter <- resamp[r]
  setTxtProgressBar(pb, r)
  for (i in 1:J) {        # loop through manifest vars
    sim.vals <- NULL
    for (j in which(!is.na(y[, i]))) {    # loop through observed country-years
      Qvec <- plogis((cutp.list[[i]][iter, ] - bugs.model$sims.list$x[iter, j])*
                       bugs.model$sims.list$beta[iter, i])
      pvec <- Qvec[1]
      for (k in 2:length(Qvec)) {
        pvec <- c(pvec, Qvec[k]-Qvec[k-1])
      }
      pvec <- c(pvec, 1-sum(pvec))
      sim.vals <- c(sim.vals, which(rmultinom(1, 1, pvec)==1))
    }
    sim.tab[[i]] <- rbind(sim.tab[[i]], table(sim.vals))
  }
}

# Generate figure 1
{
windows(width=12, height=6)
par(mfrow=c(3, 4), mar=c(4, 4, 2, 0.5), las=1)
misses.total <- 0
for (i in 1:J) {
  hist(y[, i], breaks=seq(-0.5, K[i]+0.5, 1), xlim=c(0, K[i]+1), ylim=c(0, max(table(y[, i]))*1.3), 
       col="gray", xaxt="n", xlab="Rating Level\n", ylab="Rating Frequency", main=vars[i])
  axis(1, c(1:K[i]), c(1:K[i]))
  points(c(1:K[i]), apply(sim.tab[[i]], 2, mean), pch=19)
  segments(c(1:K[i]), apply(sim.tab[[i]], 2, quantile, 0.025), c(1:K[i]), apply(sim.tab[[i]], 2, quantile, 0.975))
  cat("\n\n", i, vars[i], "\n")
  hi <- apply(sim.tab[[i]], 2, quantile, 0.975)
  lo <- apply(sim.tab[[i]], 2, quantile, 0.025)
  resmat <- round(rbind(hi, table(y[, i]), lo))
  rownames(resmat) <- c("97.5%", "actual", "2.5%")
  print(resmat)
  misses <- sum((table(y[, i]) < lo) | (table(y[, i]) > hi))
  misses.total <- misses.total+misses
  cat("misses:", misses, "\n")
  for (j in 1:K[i]) {
    segments(j-0.15, apply(sim.tab[[i]], 2, quantile, 0.025)[j], j+0.15, apply(sim.tab[[i]], 2, quantile, 0.025)[j])
    segments(j-0.15, apply(sim.tab[[i]], 2, quantile, 0.975)[j], j+0.15, apply(sim.tab[[i]], 2, quantile, 0.975)[j])
  }
}
}

# This shows the % of misses
misses.total/sum(K)

## SET-UP FOR FIGURES 2 & 4
##
## This actually creates the graphs in Appendix, Section J

{
simsort <- apply(bugs.model$sims.list$x, 2, sort)
yr <- c(min(dat$year):max(dat$year))
par(mfrow=c(5, 5), mar=c(2.5, 2, 1, 0.5), las=1, ask=T, cex.axis=0.6, cex.main=0.9) # for 40 per page in appendix
idx <- 1
for (i in order(cnames)) {
  plot(0, col="white", xlim=c(min(yr), max(yr)), ylim=c(-0.1, 1.1), 
       xlab="", ylab="", main=cnames[i], bty="n")
  sel <- dat[country==i, ]
  xax <- c(min(sel$year):max(sel$year))
  polygon(c(xax, rev(xax)), 
          c(simsort[floor(bugs.model$n.sims*0.1), coffset[i]:(coffset[i+1]-1)], 
            rev(simsort[ceiling(bugs.model$n.sims*0.9), coffset[i]:(coffset[i+1]-1)])), 
          col="gray80", border=NA)
  lines(yr, xhat[i, ], col="black", lwd=2)
  
  if (idx/40 == round(idx/40)) {
    if (saveplots) { dev.copy2pdf(file=paste("alltrends", idx/40, ".pdf", sep="")) }
  }
  idx <- idx+1
}
}

## FIGURE 2: Media System Freedom (MSF) in 194 countries, 2016: Lower/Upper Bounds
##
## Plot Scores for all countries in 2016 
##

{
plotyear <- max(year) - 1 # Change this number to indicate which year to display. For examples, 2017 - 1 = 2016
keep <- !is.na(xhat[, plotyear])
x <- xhat[keep, plotyear]
cn.keep <- cnames[keep]
o <- order(x)
ci <- NULL
for (i in 1:max(country)) {
  sims <- sort(bugs.model$sims.list$x[, country==i & year==plotyear])
  nsim <- length(sims)
  ci <- rbind(ci, c(sims[floor(0.1*nsim)], sims[ceiling(0.9*nsim)]))
}


par(mfrow = c(1,2)) #Make vertical, side-by-side figures
# Low MSF group
par(mar=c(2.5, 6.7, 0, 0), las=1)
lowgrp <- floor(sum(keep)/2)
plot(x[o][1:lowgrp], rev(1:lowgrp), xlab="", pch=19, ylab="", 
     yaxt="n", bty="n", cex.axis=0.8, xlim=c(0, 1))
segments(ci[o, 1][1:lowgrp], rev(1:lowgrp), ci[o, 2][1:lowgrp], rev(1:lowgrp), lwd=2)
axis(2, rev(1:lowgrp), cn.keep[o][1:lowgrp], cex.axis=.4, tick=T, line=-.5)

# High MSF group
par(mar=c(2.5, 6.7, 0, 0), las=1)
higrp <- lowgrp+1
plot(x[o][higrp:sum(keep)], rev(higrp:sum(keep)), xlab="", pch=19, ylab="", 
     yaxt="n", bty="n", cex.axis=0.8, xlim=c(0, 1))
segments(ci[o, 1][higrp:sum(keep)], rev(higrp:sum(keep)), ci[o, 2][higrp:sum(keep)], rev(higrp:sum(keep)), lwd=2)
axis(2, rev(higrp:sum(keep)), cn.keep[o][higrp:sum(keep)], cex.axis=.4, tick=T, line=-.5)
}

#Note: Image in Plots window will not show every country. You'll need to export the figure to see figure 2 in manuscript

# ~~~~~~~~
## FIGURE 3: Media System Freedom (MSF) Worldwide, 2016 Point Estimates
##
## Plots figure 2 data to a worldwide map
## 

#Create object w/ 2016 MSF data
msf_map<-read.csv("2016MSFMmap.csv") 

mapped_data <- joinCountryData2Map(msf_map, joinCode = "ISO3", 
                                   nameJoinColumn = "ISO3V10")

colourPalette <- brewer.pal(7,'Greys')

{
par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")
mapCountryData(mapped_data, 
               nameColumnToPlot = "msf", 
               # catMethod="fixedWidth" 
               mapTitle="",
               addLegend='FALSE',
               colourPalette = colourPalette,
               missingCountryCol = 'White')
}

## FIGURE 4: Media System Freedom (MSF) Temporal Trends in Eight Countries, 1948-2017
##
## Plot MSF scores for eight countries alongside manifest variables
##

{
windows(width=4, height=4)
par(mar=c(3, 3, 3, 1), mfrow=c(2, 4), las=1, ask=F)
yr <- c(min(dat$year):max(dat$year))
toplot <- c("Poland", "Korea, North", "United States", "Spain", "Nigeria", "Brazil", "Armenia", "Jamaica")
for (i in toplot) {
  sel <- dat[dat$country==i, ]
  idx <- which(cnames==i)
  xax <- c(min(sel$year):max(sel$year))
  plot(0, col="white", xlim=c(min(yr), max(yr)), ylim=c(-0.1, 1.1), xlab="", ylab="", 
       main=i, bty="n")
  polygon(c(xax, rev(xax)), 
          c(simsort[floor(bugs.model$n.sims*0.1), coffset[idx]:(coffset[idx+1]-1)], 
            rev(simsort[ceiling(bugs.model$n.sims*0.9), coffset[idx]:(coffset[idx+1]-1)])), 
          col="gray80", border=NA)
  lines(sel$year, rowMeans(cbind(rescale01(sel$gmf, 1, 3), 
                                 rescale01(sel$fh, 1, 3), 
                                 rescale01(sel$rsf, 1, 4), 
                                 rescale01(sel$trd_cen, 0,4),
                                 rescale01(sel$critical, 0,4),
                                 rescale01(sel$perspectives, 0,4),
                                 rescale01(sel$harassment, 0,4),
                                 rescale01(sel$selfcen, 0,4),
                                 rescale01(sel$bias, 0,4),
                                 rescale01(sel$corrupt, 0,4)
                                 
  ), na.rm=TRUE), lwd=2, col="gray40")
  lines(yr, xhat[idx, ], col="black", lwd=3)
  jit <- 0.04
  points(sel$year, rescale01(sel$gmf, 1, 3)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[1]/30), pch=15, cex=1.3)
  points(sel$year, rescale01(sel$fh, 1, 3)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[2]/30), pch=17, cex=1.3)
  points(sel$year, rescale01(sel$rsf, 1, 4)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[3]/30), pch=18, cex=1.3)
  points(sel$year, rescale01(sel$trd_cen, 0,4)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[4]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$critical, 0,3)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[5]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$perspectives, 0,3)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[6]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$harassment, 0,4)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[7]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$selfcen, 0,3)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[8]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$bias, 0,4)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[9]/30), pch=19, cex=1.3)
  points(sel$year, rescale01(sel$corrupt, 0,4)+runif(nrow(sel), -jit, jit), col=rgb(0, 0, 0, bugs.model$mean$beta[10]/30), pch=19, cex=1.3)
  if (saveplots) { dev.copy2pdf(file=paste("trendfit-", gsub(" ", "", i), ".pdf", sep="")) }
}
}

## FIGURE 5: Threshold Estimates, 10 Media Freedom Indicators
##
## Compare cutpoints of the 10 manifest variables
##

{
windows(width=12, height=6)
par(mar=c(2.5, 4, 1, 0), ask=F, las=1, mfrow=c(1, 1))
cutpoints <- bugs.model$mean$cutp
plot(0, col="white", xlab="", ylab="Threshold value", bty="n", xaxt="n", 
     xlim=c(1, length(cutpoints)-0.5), ylim=c(-0.5, 1.5))
cur <- 1
abline(v=cur-0.5, lty=3, col="gray10")
for (i in 1:J) {
  sm <- as.matrix(bugs.model$sims.list$cutp[, koffset[i]:(koffset[i+1]-1)])
  for (j in 1:ncol(sm)) {
    polygon(c(cur-0.35, cur-0.35, cur+0.35, cur+0.35), 
            c(quantile(sm[, j], 0.025), quantile(sm[, j], 0.975), quantile(sm[, j], 0.975), quantile(sm[, j], 0.025)), 
            border="gray80", col="gray80")
    segments(cur-0.35, mean(sm[, j]), cur+0.35, mean(sm[, j]), lwd=2)
    cur <- cur+1
  }
  abline(v=cur-0.5, lty=3, col="gray10")
}
axis(1, c(1.5, 3.5, 6.5, 10.5, 14, 17, 20, 23.5, 27, 30.5, 34.5, 38), 
     c("GMF\n", "FH:FoP\n", "RSF\n", "VD-Trad M.Cen\n", "VD-Inet Cen\n", "VD-Crit.M\n", "VD-M.Perspect.\n", 
       "VD-M.Harassment\n", "VD-M.Selfcen\n", "VD-M.Bias\n", "VD-M.Corruption\n", "VD-CM.Access\n"), tick=F, cex.axis=0.7)
}
